#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Load packages
library(plyr)
library(data.table)
library(here)
library(magrittr)

## Read and merge
eia_names <- list.files(path=here("data", "inter", "eiagenerator", "completed"), pattern="*.rds", full.names=TRUE, recursive=FALSE)
eia_names <- eia_names[grepl("1992|1996|2000|2004|2008|2012|2016|2020", eia_names)]

eia_list <- lapply(eia_names, readRDS)
eia <- rbindlist(eia_list, use.names=TRUE, fill=TRUE)
eia <- eia[,-16]

eia$fips <- as.numeric(eia$FIPS)
eia$FIPS <- NULL
eia <- eia %>% dplyr::rename(year = Year)
eia$year <- as.numeric(eia$year)

summary(eia$NewGasDistance * 0.000621371)
subset(eia, scale(NewGasDistance)[,1] > 2.5) %>% 
  dplyr::group_by(year) %>%
  dplyr::count() %>%
  dplyr::arrange(desc(n)) %>%
  print(n=50)

#recode miami-date and oglala lakota
#• Florida, 1997: Dade county (FIPS 12025) is renamed as Miami-Dade county (FIPS 12086).
eia[eia$fips==12086&eia$year<1997,]$fips<-12025
eia[eia$fips==46113&eia$year>=2016,]$fips<-46102

eia <- data.frame(eia)

eia <- eia[-which(eia$fips==12025&is.na(eia$NewGasDistance)),]
eia <- eia[-which(eia$fips==46102&is.na(eia$NewGasDistance)),]

#subset other energy types--seperate project
eia <- subset(eia, select = c(year, Gas, NewGas, NewGasDistance, fips))

#save data
saveRDS(eia, here("data", "inter", "eiagenerator", "eia_data_final.rds"))
